import requests
import re
import csv
# result = requests.get('https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2005378.txt')
# result = requests.get('https://www.qschina.cn/university-rankings/university-subject-rankings/2020/communication-media-studies')

# 分而治之，将复杂的问题，分解为简单的小问题，最后组合在一起。
# 将不同功能分解到不同的函数中。
# 及时测试。按照功能、函数及时测试新增的代码。


# 生成历年数据入口地址
def urls(start,end):
    resource={}
    for i in range(start, end + 1):
        url = 'https://www.qschina.cn/university-rankings/university-subject-rankings/' + str(i) + '/communication-media-studies'
        result = requests.get(url)
        dataUrl = getUrl(result.text)
        resource[i] = dataUrl
    return resource

# 获取数据入口地址
def getUrl(content):
    pattern = re.compile('https:.*?(\d*)\.txt\"')
    result = re.search(pattern, content)
    url = 'https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/' + result.group(1) + '.txt'
    return url

# print(getUrl(result.text))

# fo = open("./22/results.txt", "w",encoding='utf-8')
# fo.write(result.text.encode('utf-8').decode('unicode_escape'))
# fo.close()

# results = result.json()['data']

# # print(type(results))

# 获取大学名称
def getTitle(content):
    pattern = re.compile('<a.*?>(.*?)<\\/a>')
    result = re.search(pattern, content)
    return result.group(1)

# # # print(getTitle(result.json()['data'][0]['title']))


resources = urls(2017,2020)

with open('names.csv', 'w', newline='',encoding='utf-8') as csvfile:
    fieldnames = ['name', 'value','date']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for year in resources:
        results = requests.get(resources[year])
        for item in results.json()['data']:
            writer.writerow({'name': getTitle(item['title']),'value': item['score'],'date': year})
    csvfile.close()